set more off

use "S:\Projects\Oxford - R&D Tax Credits\Data\Data refresh May 2014\ct600_fame_rdclaiming", clear
keep if year>=2005 & year<=2011

*Define variable lists with different source data
global smedat  ct_chargeable cost_under_sme_scheme r_and_d_tax_credit   ///
instalment_payments      actual_exp_on_rd r_and_d_repayable        trading_turnover     ///    
ct_rate r_and_d_enhanced_exp     exp_for_pure_deduction   sme_enhance_rate vaccine_research_exp  ///
exp_for_pure_payb_cred   sme_payable_credit_rate sme_claim_as_large_co   exp_for_comb_claim_ded  ///
payb_cred_for_sme_est r_and_d_sme_claim        exp_for_comb_claim_cr    credit_claim_exp_est ///
r_and_d_large_co_claim   cost_sme_dedn             profits_chargeable cost_sme_payb_cred r_and_d_sme_claim  ///
instalment_payments r_and_d_large_co_claim  

global lcdat sme_claim_as_large_co   instalment_payments  claim_by_sme_subcont ///
r_and_d_tax_credit      r_and_d_sme_claim   trading_turnover        actual_exp_on_rd r_and_d_repayable  ///
r_and_d_large_co_claim  large_co_ct_rate      r_and_d_enhanced_exp    profits_chargeable    ///
sme_ct_rate_subcont vaccine_research_exp    ct_chargeable           enhance_rate_large_co ///
r_and_d_sme_claim  r_and_d_large_co_claim  instalment_payments 


*Modify data set based variable names to conform with CT600 variable names
foreach xx in $smedat {
capture rename `xx'_smedat2 `xx'_sme2
capture rename `xx'_smedat `xx'_sme
}

foreach xx in $lcdat {
capture rename `xx'_lcdat2 `xx'_large_co2
capture rename `xx'_lcdat `xx'_large_co
}


*Check if data sets conform with each other
foreach size in sme large_co {
replace actual_exp_on_rd_`size'=0 if actual_exp_on_rd_`size'==. 
}

gen rd=actual_exp_on_rd_sme+actual_exp_on_rd_large_co

*merge with group id
merge m:1 identifier using "${hmrc_datadir}\ownership2\groupid_clean"
keep if _merge==3
drop _merge
bysort group year: egen rd_sum=total(rd)
bysort group year: egen turnover_sum=total(turnover_ct600)

gen rd_ratio=rd_sum/turnover_sum
sum rd_ratio, d
winsor rd_ratio, gen (rd_intensity) p(0.01)
sum rd_intensity, d

bysort group: egen rd_firm=mean(rd_intensity)
save "${hmrc_datadir}\grouprd_intensity", replace

qui use "${hmrc_datadir}\export_reg_final_ukmncs_fy", clear
merge m:1 identifier year using "${hmrc_datadir}\grouprd_intensity"
unique identifier if _merge==1
*693
unique identifier if _merge==2
*22,267
unique identifier if _merge==3
*563
drop if _merge==2
replace rd_intensity=0 if _merge==1
replace rd_firm=0 if _merge==1
drop _merge

save "${hmrc_datadir}\export_reg_final_ukmncs_fy_grouprd", replace
